capture program drop FiltroDatos
program define FiltroDatos `W'
* W es lo que define el ancho de la ventana.
drop if abs(totmuj-20)>`1'
gen n = 1
gen above           = totmuj>=20
forvalues i = 1/9 {
gen totmuj`i'         = (totmuj-20)^`i'
gen totmuj`i'_above   = (totmuj-20)^`i'*above
}
end

********************************************************************************
* Sectors robustness: adding sectors by their M/W ratios. Main graph.
********************************************************************************

use DTA/Datos_ENIA_95_07_Processed.dta, clear

global Polinomio totmuj1 totmuj2 totmuj1_above totmuj2_above above

********************************************************************************

global Sectors_Rankings wm_order // kw_order kwage_order hi_lo_order wagel_order val_order size_order

label var wm_order "X Most Female Intensive Sectors"

matrix beta = J(115,1,.)
matrix se   = J(115,1,.)
matrix N    = J(115,1,.)

matrix prop_tottrab = J(115,1,.)
matrix prop_women   = J(115,1,.)
matrix prop_firms   = J(115,1,.)

estimates clear

forvalues i = 2/22 { 

qui sum prop_workers_sector if wm_order <= `i'
local Workers_wm_order`i' = r(mean)
qui sum prop_women_sector if wm_order <= `i'
local Women_wm_order`i' = r(mean)
qui sum prop_firms_sector if wm_order <= `i'
local Firms_wm_order`i' = r(mean)
matrix prop_tottrab[`i',1] = `Workers_wm_order`i''
matrix prop_women[`i',1]   = `Women_wm_order`i''
matrix prop_firms[`i',1]   = `Firms_wm_order`i''

preserve
* Keep only the most intensive sectors in women, ranked by their average M/W.
gen high_intensity = wm_order <= `i'
qui FiltroDatos 10
//if _N >= 30 {
collapse (sum) n, by($Polinomio high_intensity)
gen log_n = log(n)

reg log_n $Polinomio if high_intensity==1
matrix beta[`i',1] = _b[above]
matrix se[`i',1]   = _se[above]
matrix N[`i',1]   = `i'

restore

}

svmat N, name(ejeX1)
svmat beta, name(beta)
svmat se, name(errEst)

gen upper = beta + 1.96*errEst
gen lower = beta - 1.96*errEst

svmat prop_tottrab, name(prop_tottrab_graph)
svmat prop_women, name(prop_women_graph)
svmat prop_firms, name(prop_firms_graph)
label var prop_firms_graph "Proportion of Firms"
label var ejeX1 "Sector Ranking by M/W intensity"
label var beta "Estimated Bunching"

twoway (line beta upper lower ejeX1, yline(0) lcolor(blue gray gray) lpattern(solid dash dash)) (scatter prop_firms_graph ejeX1, yaxis(2) mcolor(maroon%50) msize(vsmall)) ///
 , name(sector_robustness_wm_order, replace) title("Bunching by Sector Average Female Intensity")
 
graph save GRAPHS/sector_robustness_wm_order, replace


********************************************************************************
* Sectors robustness: adding firms by their size. Main graph.
********************************************************************************

use DTA/Datos_ENIA_95_07_Processed.dta, clear

global Polinomio totmuj1 totmuj2 totmuj1_above totmuj2_above above

********************************************************************************

global Sectors_Rankings size_order

label var size_order "X Largest firms in terms of employment"

matrix beta = J(115,1,.)
matrix se   = J(115,1,.)
matrix N    = J(115,1,.)

matrix prop_tottrab = J(115,1,.)
matrix prop_women   = J(115,1,.)
matrix prop_firms   = J(115,1,.)

estimates clear

forvalues i = 2/22 { 

qui sum prop_workers_sector if size_order >= `i'
local Workers_wm_order`i' = r(mean)
qui sum prop_women_sector if size_order >= `i'
local Women_wm_order`i' = r(mean)
qui sum prop_firms_sector if size_order >= `i'
local Firms_wm_order`i' = r(mean)
matrix prop_tottrab[`i',1] = `Workers_wm_order`i''
matrix prop_women[`i',1]   = `Women_wm_order`i''
matrix prop_firms[`i',1]   = `Firms_wm_order`i''

preserve
* Keep only the most intensive sectors in women, ranked by their average M/W.
gen high_intensity = size_order >= `i'
qui FiltroDatos 10
//if _N >= 30 {
collapse (sum) n, by($Polinomio high_intensity)
gen log_n = log(n)

reg log_n $Polinomio if high_intensity==1
matrix beta[`i',1] = _b[above]
matrix se[`i',1]   = _se[above]
matrix N[`i',1]   = `i'

restore

}

svmat N, name(ejeX1)
svmat beta, name(beta)
svmat se, name(errEst)

gen upper = beta + 1.96*errEst
gen lower = beta - 1.96*errEst

svmat prop_tottrab, name(prop_tottrab_graph)
svmat prop_women, name(prop_women_graph)
svmat prop_firms, name(prop_firms_graph)
label var prop_firms_graph "Proportion of Firms"
label var ejeX1 "Number of employees considered for large firms"
label var beta "Estimated Bunching"

twoway (line beta upper lower ejeX1, yline(0) lcolor(blue gray gray) lpattern(solid dash dash)) (scatter prop_firms_graph ejeX1, yaxis(2) mcolor(maroon%50) msize(vsmall)) ///
 , name(sector_robustness_wm_order, replace) title("Different Cut-Offs for firm size")
 
graph save GRAPHS/sector_robustness_size_order, replace

********************************************************************************
* Sectors robustness: adding sectors by firm size
********************************************************************************


use DTA/Datos_ENIA_95_07_Processed.dta, clear

global Polinomio totmuj1 totmuj2 totmuj1_above totmuj2_above above

********************************************************************************

matrix beta = J(115,1,.)
matrix se   = J(115,1,.)
matrix N    = J(115,1,.)

matrix prop_tottrab = J(115,1,.)
matrix prop_women   = J(115,1,.)
matrix prop_firms   = J(115,1,.)

estimates clear

forvalues i = 3/30 { 

local size = `i'*10
if `i' > 10 {
qui sum prop_workers_sector if tottrab <= `size'
local Workers_tottrab`i' = r(mean)
qui sum prop_women_sector if tottrab <= `size'
local Women_tottrab`i' = r(mean)
qui sum prop_firms_sector if tottrab <= `size'
local Firms_tottrab`i' = r(mean)
matrix prop_tottrab[`i',1] = prop_tottrab[`i'-1, 1] + `Workers_tottrab`i''
matrix prop_women[`i',1]   = prop_women[`i'-1, 1]   + `Women_tottrab`i''
matrix prop_firms[`i',1]   = prop_firms[`i'-1, 1]   + `Firms_tottrab`i''
}
else {
qui sum prop_workers_sector if tottrab <= `size'
local Workers_tottrab`i' = r(mean)
qui sum prop_women_sector if tottrab <= `size'
local Women_tottrab`i' = r(mean)
qui sum prop_firms_sector if tottrab <= `size'
local Firms_tottrab`i' = r(mean)
matrix prop_tottrab[`i',1] = `Workers_tottrab`i''
matrix prop_women[`i',1]   = `Women_tottrab`i''
matrix prop_firms[`i',1]   = `Firms_tottrab`i''
}

preserve
* Keep only the most intensive sectors in women, ranked by their average M/W.
gen high_intensity = tottrab <= `size'
qui FiltroDatos 10
if _N >= 30 {
collapse (sum) n, by($Polinomio high_intensity)
gen log_n = log(n)

reg log_n i.high_intensity##i.above##c.totmuj1 i.high_intensity##i.above##c.totmuj2
matrix beta[`i',1] = _b[1.high_intensity#1.above]
matrix se[`i',1]   = _se[1.high_intensity#1.above]
matrix N[`i',1]    = `size'

}
restore

}

svmat N, name(ejeX1)
svmat beta, name(beta1)
svmat se, name(errEst)
gen upper = beta1 + 1.96*errEst
gen lower = beta1 - 1.96*errEst

svmat prop_tottrab, name(prop_tottrab_graph)
svmat prop_women, name(prop_women_graph)
svmat prop_firms, name(prop_firms_graph)
label var prop_firms_graph "Proportion of Firms"

twoway (line beta1 upper lower ejeX1, yline(0) ytitle("Difference in Estimated Bunching") legend(order(1 "Coefficients Difference" 3 "5% CI")) lcolor(blue gray gray) lpattern(solid dash dash)) (scatter prop_firms_graph ejeX1, yaxis(2) mcolor(maroon%50) msize(vsmall)) ///
 , name(robustness_size, replace) title("Different Cut-offs for Firm Size")
 graph export GRAPHS/robustness_size.pdf, replace